USGS Data Insights#

import pandas as pd
import numpy as np
import datetime as dt
csv_file = "All (1960-2023).csv"
usgs = pd.read_csv(csv_file, sep = ',', lineterminator='\n', dtype={'time':str})
usgs.head()
C:\Users\Vishal\AppData\Local\Temp\ipykernel_15396\3779433482.py:2: DtypeWarning: Columns (1,2,3,4,6,7,8,9,15,16,17,18) have mixed types. Specify dtype option on import or set low_memory=False.
  usgs = pd.read_csv(csv_file, sep = ',', lineterminator='\n', dtype={'time':str})
time latitude longitude depth mag magType nst gap dmin rms ... Unnamed: 991 Unnamed: 992 Unnamed: 993 Unnamed: 994 Unnamed: 995 Unnamed: 996 Unnamed: 997 Unnamed: 998 Unnamed: 999 \r
0 2023-10-12T21:41:21.480Z 36.466167 -120.875503 15.85 3.15 ml 60.0 135.0 0.1035 0.2 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN \r
1 2023-10-08T21:30:23.900Z 38.827167 -122.804 1.75 3.87 mw 108.0 20.0 0.006058 0.06 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN \r
2 2023-10-05T03:09:58.000Z 35.041 -117.661 0.79 3.52 ml 63.0 40.0 0.1102 0.15 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN \r
3 2023-10-01T19:29:36.760Z 40.2915 -124.2905 9.59 3.61 mw 40.0 115.0 0.0308 0.17 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN \r
4 2023-10-01T15:41:29.620Z 40.295167 -124.287 9.8 4.09 mw 42.0 105.0 0.02685 0.17 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN \r

5 rows × 1001 columns

#converting the Date column into datetime format
usgs["time"] = pd.to_datetime(usgs["time"], errors="coerce").dt.strftime("%Y-%m-%d")
usgs = usgs[(pd.to_datetime(usgs['time']) > pd.to_datetime('1960-01-01')) & (pd.to_datetime(usgs['time']) < pd.to_datetime('2023-01-01'))]

usgs['longitude'] = pd.to_numeric(usgs['longitude'], errors='coerce')
usgs['latitude'] = pd.to_numeric(usgs['latitude'], errors='coerce')
usgs['mag'] = pd.to_numeric(usgs['mag'], errors='coerce')

#filter the dataset by X > -123 and X < -113 and Y > 29 and Y < 39
usgs = usgs[usgs['longitude'] > -123]
usgs = usgs[usgs['longitude'] < -113]
usgs = usgs[usgs['latitude'] < 39]
usgs = usgs[usgs['latitude'] > 29]
#filter the dataset by magnitude 3.4
usgs = usgs[usgs['mag'] >= 3.4]
print(len(usgs))
11617
time = []
for i in usgs['time']:
    time.append(pd.to_datetime(i))
usgs['time'] = time
usgs_grouped_counts = pd.DataFrame(usgs.groupby(usgs['time'].dt.to_period('D')).mag.count())
usgs_grouped_counts.rename(columns={'mag':'count'}, inplace=True)

usgs_grouped_max = pd.DataFrame(usgs.groupby(usgs['time'].dt.to_period('D')).mag.max())
import plotly.express as px
import plotly.graph_objects as go

Number of Earthquakes generated by USGS

usgs_grouped_counts.reset_index(inplace=True)
usgs_grouped_counts['time'] = usgs_grouped_counts['time'].dt.to_timestamp()

fig = px.line(usgs_grouped_counts, x='time', 
              y='count', labels={'value': 'Magnitude'}, 
              title='USGS Earthquake Counts By Day')
fig.update_layout(
    width=1000,
    height=600,
)

fig.update_traces(line=dict(width=1.0))
fig.update_xaxes(rangeslider_visible=True)
fig.update_layout(dragmode='pan')

fig.show()

Maximum magnitude of earthquakes generated each day by ETAS

usgs_grouped_max.reset_index(inplace=True)
usgs_grouped_max['time'] = usgs_grouped_max['time'].dt.to_timestamp()

fig = px.line(usgs_grouped_max, x='time', 
              y='mag', labels={'value': 'Magnitude'}, 
              title='USGS Earthquake Max By Day')
fig.update_layout(
    width=1000,
    height=600,
)

fig.update_traces(line=dict(width=1.0))
fig.update_xaxes(rangeslider_visible=True)
fig.update_layout(dragmode='pan')

fig.show()
#filter usgs_grouped_counts to the 10 largest values
usgs_grouped_counts_top10 = usgs_grouped_counts.nlargest(10, 'count')
usgs_grouped_counts_top10.head(10)
time count
5586 2019-07-06 316
3206 1992-06-28 123
865 1971-02-09 105
4004 1999-10-16 96
3433 1994-01-17 93
4881 2010-04-05 93
1869 1980-05-25 84
2611 1986-07-21 76
3207 1992-06-29 76
2243 1983-05-03 67
#filter the usgs_grouped_max to the 10 largest values 
usgs_grouped_max_top10 = usgs_grouped_max.nlargest(10, 'mag')
usgs_grouped_max_top10.head(10)
time mag
3206 1992-06-28 7.3
4880 2010-04-04 7.2
4004 1999-10-16 7.1
5586 2019-07-06 7.1
2939 1989-10-18 6.9
2242 1983-05-02 6.7
3433 1994-01-17 6.7
607 1968-04-09 6.6
865 1971-02-09 6.6
2753 1987-11-24 6.6
one_week = dt.timedelta(days=7)
usgs_grouped_counts_top10 = usgs_grouped_counts_top10.sort_values(by='time', ascending=True)
usgs_grouped_max_top10 = usgs_grouped_counts_top10.sort_values(by='time', ascending=True)
filtered_data_df = pd.DataFrame()

# Plotting 1 week before/after a day of high count of earthquakes
for index, row in usgs_grouped_counts_top10.iterrows():
    current_time = row['time']
    previous_time = current_time - one_week
    next_time = current_time + one_week
    
    filtered_data_before = usgs_grouped_counts[(usgs_grouped_counts['time'] >= previous_time) & (usgs_grouped_counts['time'] < current_time)].copy()
    filtered_data_before['days_until_large_value'] = (current_time - filtered_data_before['time']).dt.days  # Calculate days until the large value
    
    filtered_data_after = usgs_grouped_counts[(usgs_grouped_counts['time'] <= next_time) & (usgs_grouped_counts['time'] >= current_time)].copy()
    filtered_data_after['days_until_large_value'] = (current_time - filtered_data_after['time']).dt.days  # Calculate days until the large value
    
    filtered_data_df = pd.concat([filtered_data_df, filtered_data_before, filtered_data_after])
fig = go.Figure()

scatter = go.Scatter(
    x=filtered_data_df['days_until_large_value'],
    y=filtered_data_df['count'],
    mode='markers',
    marker=dict(
        size= 6,
        color=filtered_data_df['count'],
        colorscale='Viridis',
        colorbar=dict(title='Number of Earthquakes'),
    ),
    text=filtered_data_df['time'],
)

fig.add_trace(scatter)

fig.update_layout(
    title='Top 10 Highest Earthquake Counts, 1 Week Before/After',
    xaxis=dict(title='Days Until Large Value'),
    yaxis=dict(title='Number of Earthquakes'),
    width=1000,
    height=600,
)

fig.update_xaxes(title_text='Days Before/After Spike')
fig.update_xaxes(rangeslider_visible=True)
fig.update_layout(dragmode='pan')

fig.show()

write description about this

filtered_data_df = pd.DataFrame()

# Plotting 1 week before/after a large earthquake
for index, row in usgs_grouped_max_top10.iterrows():
    current_time = row['time']
    previous_time = current_time - one_week
    next_time = current_time + one_week
    
    filtered_data_before = usgs_grouped_max[(usgs_grouped_max['time'] >= previous_time) & (usgs_grouped_max['time'] < current_time)].copy()
    filtered_data_before['days_until_large_value'] = (current_time - filtered_data_before['time']).dt.days  # Calculate days until the large value
    
    filtered_data_after = usgs_grouped_max[(usgs_grouped_max['time'] <= next_time) & (usgs_grouped_max['time'] >= current_time)].copy()
    filtered_data_after['days_until_large_value'] = (current_time - filtered_data_after['time']).dt.days  # Calculate days until the large value
    
    filtered_data_df = pd.concat([filtered_data_df, filtered_data_before, filtered_data_after])
fig = go.Figure()

scatter = go.Scatter(
    x=filtered_data_df['days_until_large_value'],
    y=filtered_data_df['mag'],
    mode='markers',
    marker=dict(
        size=6, 
        color=filtered_data_df['mag'],
        colorscale='Viridis',
        colorbar=dict(title='Magnitude'),
    ),
    text=filtered_data_df['time'],
)

fig.add_trace(scatter)

fig.update_layout(
    title='Top 10 Largest Earthquakes, 1 Week Before/After',
    xaxis=dict(title='Days Before/After Spike'),
    yaxis=dict(title='Magnitude'),
    width=1000,
    height=600,
)

fig.update_xaxes(rangeslider_visible=True)
fig.update_layout(dragmode='pan')

fig.show()

write description about this

filtered_data_df = pd.DataFrame()

# Plotting locations of earthquakes 1 week before/after a day of high count of earthquakes
for index, row in usgs_grouped_counts_top10.iterrows():
    current_time = row['time']
    previous_time = current_time - one_week
    next_time = current_time + one_week
    
    filtered_data_before = usgs[(usgs['time'] >= previous_time) & (usgs['time'] < current_time)].copy()
    filtered_data_before['days_until_large_value'] = (current_time - filtered_data_before['time']).dt.days
    
    filtered_data_after = usgs[(usgs['time'] <= next_time) & (usgs['time'] >= current_time)].copy()
    filtered_data_after['days_until_large_value'] = (current_time - filtered_data_after['time']).dt.days
    
    # Add a new column 'time_group' to identify different times for symbols
    filtered_data_before['time_group'] = str(current_time)
    filtered_data_after['time_group'] = str(current_time)
    
    filtered_data_df = pd.concat([filtered_data_df, filtered_data_before, filtered_data_after])

Lets look at the location of earthquakes during spikes

fig = go.Figure()

color_scale_min = -6
color_scale_max = 6

for time_group in filtered_data_df['time_group'].unique():
    subset_df = filtered_data_df[filtered_data_df['time_group'] == time_group]
    
    fig.add_trace(go.Scatter(
        x=subset_df['longitude'],
        y=subset_df['latitude'],
        mode='markers',
        marker=dict(
            size=subset_df['mag'],
            sizemode='diameter',
            sizeref=0.4,
            color=subset_df['days_until_large_value'],
            symbol='circle',
            colorbar=dict(
                tickfont=dict(
                    size=12
                ),
                x=0.5,
                y=-0.2,
                orientation='h',
                len=1.0,
                title='Days +/- Large Event',
            ),
            showscale=True,
            colorscale='Viridis',
            cmin = color_scale_min,
            cmax = color_scale_max
        ),
        text=subset_df['time'],
        name=str(time_group)
    ))

# Update layout
fig.update_layout(
    width=800,
    height=800,
    title='Earthquake Locations Before/After Large Count Of Earthquakes',
    xaxis=dict(title='Longitude'),
    yaxis=dict(title='Latitude'),
    dragmode='pan',
    legend=dict(
        title='Time Groups',
        font=dict(
            size=12
        ),
    ),
)

fig.show()
filtered_data_df = pd.DataFrame()

# Plotting locations of earthquakes 1 week before/after a large  earthquake
for index, row in usgs_grouped_max_top10.iterrows():
    current_time = row['time']
    previous_time = current_time - one_week
    next_time = current_time + one_week
    
    filtered_data_before = usgs[(usgs['time'] >= previous_time) & (usgs['time'] < current_time)].copy()
    filtered_data_before['days_until_large_value'] = (current_time - filtered_data_before['time']).dt.days
    
    filtered_data_after = usgs[(usgs['time'] <= next_time) & (usgs['time'] >= current_time)].copy()
    filtered_data_after['days_until_large_value'] = (current_time - filtered_data_after['time']).dt.days
    
    # Add a new column 'time_group' to identify different times for symbols
    filtered_data_before['time_group'] = str(current_time)
    filtered_data_after['time_group'] = str(current_time)
    
    filtered_data_df = pd.concat([filtered_data_df, filtered_data_before, filtered_data_after])
fig = go.Figure()

color_scale_min = -6
color_scale_max = 6

for time_group in filtered_data_df['time_group'].unique():
    subset_df = filtered_data_df[filtered_data_df['time_group'] == time_group]
    
    fig.add_trace(go.Scatter(
        x=subset_df['longitude'],
        y=subset_df['latitude'],
        mode='markers',
        marker=dict(
            size=subset_df['mag'],
            sizemode='diameter',
            sizeref=0.4,
            color=subset_df['days_until_large_value'],
            symbol='circle',
            colorbar=dict(
                tickfont=dict(
                    size=12
                ),
                x=0.5,
                y=-0.2,
                orientation='h',
                len=1.0,
                title='Days +/- Large Event',
            ),
            showscale=True,
            colorscale='Viridis',
            cmin = color_scale_min,
            cmax = color_scale_max
        ),
        text=subset_df['time'],
        name=str(time_group)
    ))

# Update layout
fig.update_layout(
    width=800,
    height=800,
    title='Earthquake Locations Before/After Large Earthquake',
    xaxis=dict(title='Longitude'),
    yaxis=dict(title='Latitude'),
    dragmode='pan',
    legend=dict(
        title='Time Groups',
        font=dict(
            size=12
        ),
    ),
)

# Show the interactive plot
fig.show()

Lets take a look at the energy before a large number of earthquakes

csv_file = "All (1960-2023).csv"
usgs_energy = pd.read_csv(csv_file, sep = ',', lineterminator='\n', dtype={'time':str})
usgs_energy["time"] = pd.to_datetime(usgs_energy["time"], errors="coerce")
usgs_energy['mag'] = pd.to_numeric(usgs_energy['mag'], errors='coerce')
usgs_energy = usgs_energy[usgs_energy['mag'] >= 3.4]
usgs_energy.head()
C:\Users\Vishal\AppData\Local\Temp\ipykernel_15396\1523658885.py:2: DtypeWarning:

Columns (1,2,3,4,6,7,8,9,15,16,17,18) have mixed types. Specify dtype option on import or set low_memory=False.
time latitude longitude depth mag magType nst gap dmin rms ... Unnamed: 991 Unnamed: 992 Unnamed: 993 Unnamed: 994 Unnamed: 995 Unnamed: 996 Unnamed: 997 Unnamed: 998 Unnamed: 999 \r
1 2023-10-08 21:30:23.900000+00:00 38.827167 -122.804 1.75 3.87 mw 108.0 20.0 0.006058 0.06 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN \r
2 2023-10-05 03:09:58+00:00 35.041 -117.661 0.79 3.52 ml 63.0 40.0 0.1102 0.15 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN \r
3 2023-10-01 19:29:36.760000+00:00 40.2915 -124.2905 9.59 3.61 mw 40.0 115.0 0.0308 0.17 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN \r
4 2023-10-01 15:41:29.620000+00:00 40.295167 -124.287 9.8 4.09 mw 42.0 105.0 0.02685 0.17 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN \r
5 2023-09-30 17:16:47.830000+00:00 40.501 -124.4385 29.09 4.22 mw 40.0 221.0 0.08112 0.14 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN \r

5 rows × 1001 columns

usgs_energy = usgs_energy[(usgs_energy['time'] > '1960-01-01') & (usgs_energy['time'] < '2023-01-01')]

usgs_energy['longitude'] = pd.to_numeric(usgs_energy['longitude'], errors='coerce')
usgs_energy['latitude'] = pd.to_numeric(usgs_energy['latitude'], errors='coerce')

#filter the dataset by X > -123 and X < -113 and Y > 29 and Y < 39
usgs_energy = usgs_energy[usgs_energy['longitude'] > -123]
usgs_energy = usgs_energy[usgs_energy['longitude'] < -113]
usgs_energy = usgs_energy[usgs_energy['latitude'] < 39]
usgs_energy = usgs_energy[usgs_energy['latitude'] > 29]

usgs_energy.head()
time latitude longitude depth mag magType nst gap dmin rms ... Unnamed: 991 Unnamed: 992 Unnamed: 993 Unnamed: 994 Unnamed: 995 Unnamed: 996 Unnamed: 997 Unnamed: 998 Unnamed: 999 \r
240 2022-12-31 12:12:26.650000+00:00 33.397500 -116.393333 3.88 4.14 mw 132.0 16.0 0.07391 0.19 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN \r
241 2022-12-31 11:41:09.460000+00:00 34.355667 -116.921833 4.73 3.47 mw 121.0 25.0 0.07845 0.15 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN \r
262 2022-12-17 11:39:42.860000+00:00 37.918167 -122.304000 5.48 3.57 mw 170.0 19.0 0.01598 0.15 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN \r
267 2022-12-07 14:13:22.690000+00:00 35.508667 -118.391500 5.07 3.56 mw 58.0 21.0 0.1678 0.16 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN \r
270 2022-12-06 00:10:37.510000+00:00 35.935667 -120.012667 9.46 3.98 mw 66.0 75.0 0.08222 0.16 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN \r

5 rows × 1001 columns

#filter the dataset by magnitude 3.4
usgs_energy = usgs_energy[usgs_energy['mag'] >= 3.4]
print(len(usgs_energy))
11617
formula_constant = (1/1.5)
usgs_energy['energy'] = 10**(1.5*usgs_energy['mag'])
usgs_energy['energy'] = np.log(usgs_energy['energy']) * formula_constant
usgs_energy.head()
time latitude longitude depth mag magType nst gap dmin rms ... Unnamed: 992 Unnamed: 993 Unnamed: 994 Unnamed: 995 Unnamed: 996 Unnamed: 997 Unnamed: 998 Unnamed: 999 \r energy
240 2022-12-31 12:12:26.650000+00:00 33.397500 -116.393333 3.88 4.14 mw 132.0 16.0 0.07391 0.19 ... NaN NaN NaN NaN NaN NaN NaN NaN \r 9.532702
241 2022-12-31 11:41:09.460000+00:00 34.355667 -116.921833 4.73 3.47 mw 121.0 25.0 0.07845 0.15 ... NaN NaN NaN NaN NaN NaN NaN NaN \r 7.989970
262 2022-12-17 11:39:42.860000+00:00 37.918167 -122.304000 5.48 3.57 mw 170.0 19.0 0.01598 0.15 ... NaN NaN NaN NaN NaN NaN NaN NaN \r 8.220229
267 2022-12-07 14:13:22.690000+00:00 35.508667 -118.391500 5.07 3.56 mw 58.0 21.0 0.1678 0.16 ... NaN NaN NaN NaN NaN NaN NaN NaN \r 8.197203
270 2022-12-06 00:10:37.510000+00:00 35.935667 -120.012667 9.46 3.98 mw 66.0 75.0 0.08222 0.16 ... NaN NaN NaN NaN NaN NaN NaN NaN \r 9.164289

5 rows × 1002 columns

from datetime import datetime
usgs_energy['time'] = pd.to_datetime(usgs_energy['time']).dt.strftime("%Y-%m-%d %H:%M:%S.%f%z")
usgs_energy['time'] = usgs_energy['time'].apply(lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f%z"))
usgs_energy.head()
time latitude longitude depth mag magType nst gap dmin rms ... Unnamed: 992 Unnamed: 993 Unnamed: 994 Unnamed: 995 Unnamed: 996 Unnamed: 997 Unnamed: 998 Unnamed: 999 \r energy
240 2022-12-31 12:12:26.650000+00:00 33.397500 -116.393333 3.88 4.14 mw 132.0 16.0 0.07391 0.19 ... NaN NaN NaN NaN NaN NaN NaN NaN \r 9.532702
241 2022-12-31 11:41:09.460000+00:00 34.355667 -116.921833 4.73 3.47 mw 121.0 25.0 0.07845 0.15 ... NaN NaN NaN NaN NaN NaN NaN NaN \r 7.989970
262 2022-12-17 11:39:42.860000+00:00 37.918167 -122.304000 5.48 3.57 mw 170.0 19.0 0.01598 0.15 ... NaN NaN NaN NaN NaN NaN NaN NaN \r 8.220229
267 2022-12-07 14:13:22.690000+00:00 35.508667 -118.391500 5.07 3.56 mw 58.0 21.0 0.1678 0.16 ... NaN NaN NaN NaN NaN NaN NaN NaN \r 8.197203
270 2022-12-06 00:10:37.510000+00:00 35.935667 -120.012667 9.46 3.98 mw 66.0 75.0 0.08222 0.16 ... NaN NaN NaN NaN NaN NaN NaN NaN \r 9.164289

5 rows × 1002 columns

usgs_grouped_energy = pd.DataFrame(usgs_energy.groupby(usgs_energy['time'].dt.to_period('S')).energy.sum())
usgs_grouped_energy.reset_index(inplace=True)
usgs_grouped_energy.head()
C:\Users\Vishal\AppData\Local\Temp\ipykernel_15396\4149130055.py:1: UserWarning:

Converting to PeriodArray/Index representation will drop timezone information.
time energy
0 1960-01-02 22:51:45 9.302444
1 1960-01-07 17:51:32 8.381410
2 1960-01-11 19:08:39 8.726798
3 1960-01-20 03:25:53 11.512925
4 1960-01-26 04:17:36 11.282667
usgs_grouped_energy['time'] = usgs_grouped_energy['time'].dt.to_timestamp()

fig = px.line(usgs_grouped_energy, x='time', 
              y='energy', labels={'value': 'energy'}, 
              title='USGS Energy')
fig.update_layout(
    width=1000,
    height=600,
)

fig.update_traces(line=dict(width=1.0))
fig.update_xaxes(rangeslider_visible=True)
fig.update_layout(dragmode='pan')

fig.show()
filtered_data_df = pd.DataFrame()

# Plotting 1 week before/after a day with a large number of earthquakes
for index, row in usgs_grouped_counts_top10.iterrows():
    current_time = row['time']
    previous_time = current_time - one_week
    next_time = current_time + one_week
    
    filtered_data_before = usgs_grouped_energy[(usgs_grouped_energy['time'] >= previous_time) & (usgs_grouped_energy['time'] < current_time)].copy()
    filtered_data_before['time_until_large_value'] = (current_time - filtered_data_before['time'])
    
    filtered_data_after = usgs_grouped_energy[(usgs_grouped_energy['time'] <= next_time) & (usgs_grouped_energy['time'] >= current_time)].copy()
    filtered_data_after['time_until_large_value'] = (current_time - filtered_data_after['time'])
    
    filtered_data_df = pd.concat([filtered_data_df, filtered_data_before, filtered_data_after])
fig = go.Figure()

scatter = go.Scatter(
    x=filtered_data_df['time_until_large_value'],
    y=filtered_data_df['energy'],
    mode='markers',
    marker=dict(
        size=6,
        color=filtered_data_df['energy'],
        colorscale='Viridis',
        colorbar=dict(title='Energy'),
    ),
    text=filtered_data_df['time'],
)

fig.add_trace(scatter)

fig.update_layout(
    title='Top 10 Largest Earthquake Count Days, 1 Week Before/After (Energy)',
    xaxis=dict(title='Days Before/After Spike'),
    yaxis=dict(title='Energy'),
    width=1000,
    height=600,
)

# Add interactivity
fig.update_xaxes(rangeslider_visible=True)
fig.update_layout(dragmode='pan')

fig.show()